"""
@name : 词频统计
@author : shiyaling
@projectname : xsfh
"""
import re
#打开Walden.txt
fp = open("Walden.txt","r+",encoding = "utf-8")
fp.seek(0)
data = fp.read()
#将文本中所有的标点以及数字换成空格，并把大写字母转换为小写字母
clear_list = '[,.;1234567890"-/:\n]'
data1 = re.sub(clear_list," ",data)
data2 = data1.lower()

#统计
lst = []
lst = data2.split(" ")
dct = {}
for i in lst:
    if i != '':
        if i in dct:
            dct[i] += 1
        else:
            dct[i] = 1
lst1 = sorted(dct.items(),key=lambda dct:dct[1],reverse=True)
print(lst1)







